
***Limpieza de datos
clear all
set more off

pause on 

set trace off 
set tracedepth 1
	
dis "`c(username)'"
	

**************************************************************************
****************** OPENING DATA SET
**************************************************************************

use "$data_folder/NBER_HPI_PANEL.dta", clear


**Missing ubigeo
drop if ubigeo=="777" | ubigeo=="888" | ubigeo=="999"
**Missing sector22
drop if sector22=="Missing" | sector9=="Missing"

**************************************************************************
****************** MERGING
**************************************************************************

**ENAHO (Employment variation by sector)

*1) Second quarter of 2020 relative to first quarter of 2020
// preserve
// 	*import excel using "$data_folder/changes_employment_sector.xlsx", first clear
// 	use "$data_folder/changes_employment_sector.dta", clear
// 	rename med median_sector
// 	rename sector_number sector22_id
// 	destring sector22_id, replace
// 	tempfile nacho
// 	save `nacho'
// restore

*2) Second quarter of 2020 relative to 2019(whole year)
preserve
	*import excel using "$data_folder/changes_employment_sector.xlsx", first clear
	use "$data_folder/changes_employment_sector_v2.dta", clear
	rename sector_number sector22_id
	destring sector22_id, replace
	keep sector22_id pct_chng_wgt
// 	rename median_sector median_sector_v2
	rename pct_chng_wgt pct_chng_wgt_v2
	tempfile nacho2
	save `nacho2'
restore

// *3) Second quarter of 2020 relative to 2019 (quarte2)
// preserve
// 	*import excel using "$data_folder/changes_employment_sector.xlsx", first clear
// 	use "$data_folder/changes_employment_sector_v3.dta", clear
// 	rename med median_sector
// 	rename sector_number sector22_id
// 	destring sector22_id, replace
// 	keep sector22_id median_sector pct_chng_wgt
// 	rename median_sector median_sector_v3
// 	rename pct_chng_wgt pct_chng_wgt_v3
// 	tempfile nacho3
// 	save `nacho3'
// restore


/*NOTE
V2 has 4 different sectors that switched
*i) From 1 --> 0
Pesca y acuicultura 
Suministro de agua; evacuación de aguas residuales, gestión de desechos y descontaminación

*ii) From 0 --> 1
Actividades inmobiliarias
Actividades de servicios administrativos y de apoyo
*/

// merge m:1 sector22_id using "`nacho'", nogen keep(1 3)
merge m:1 sector22_id using "`nacho2'", nogen keep(1 3)
// merge m:1 sector22_id using "`nacho3'", nogen keep(1 3)




***Merging with bartik instrument
preserve
	use "$outcomes_bartik/bartik.dta", clear
	gen ln_bartik_national=ln(bartik_national)
	tempfile bartik
	save `bartik'
restore

merge m:1 ubigeo year using "`bartik'", gen(_m3)
keep if _m3==3 | _m3==1
drop _m3 


*ii) Merging with shares
rename ciuo1_digit isco_1dig
merge m:1 isco_1dig using "$wfh/wfh_peru_1digits.dta", gen(_m1)
keep if _m1==3 | _m1==1
drop _m1
rename median median_wfh


// merging with nightlight and NO2 panel
// from do "$do_folder_nlno2/02 Merging No2 Nightlight and Bartik.do"
// merge m:1 ubigeo year using "$data_folder/no2_nightligh_data_tomerge_with_sample.dta", gen(_no2_merge) keep(1 3)

// from do "$do_folder_nlno2/02_a Get Q1 Q2 changes.do"
// merge m:1 ubigeo using "$data_folder/bartick_no2_q2q1_changes.dta", gen(_q2q1_merge) keep(1 3)


//adding covid data 
merge m:1 ubigeo year using "$data_folder/covid_ubigeo.dta", gen(_covid_merge) keep(1 3)
//replace covid data with zero if missing
mvencode deaths cases, mv(0) override

foreach var in  deaths cases  {
	
	gen ln_`var' = log(`var')
	gen any_`var' = `var' >0

}


//merging in june 2020 deaths 
// from 02_dofiles/09 covid data/1_get_july_deaths.do


merge m:1 ubigeo using "$data_folder/covid_july_deaths.dta", keep(1 3) nogen

replace june_deaths = 0 if june_deaths == . 
replace june_death_rate = 0 if june_death_rate == . 





//here you do per 100,000 people somehow 


**************************************************************************
****************** Adding baseline hpi data 
**************************************************************************


merge m:1 caseid using "$data_folder/survey_data/baseline_hpi_deidentified.dta", keep(master matched) gen(hpi_id_merge)
//
// stop
// **Merging with inei
// ***First we need to know id (H_P208) of inei data. So we merge with baseline hpi
// merge m:1 caseid using "$data_folder/survey_data/PrimeraRonda_HPI_Covid_pre_checked_completed.dta", keep(master matched) keepusing(caseid H_P208) gen(hpi_id_merge)
//
// *Now we merge with inei data
// ///Merging with Baseline Survey
// 	preserve
// 		use "$data_inei/HPI_LB_HM_merged.dta", clear
// 		keep if H_RESULT == 1
// 		gen nombre_completo=lower(H_P13_NOM)
// 		*DNI hombre
// 		sort H_P208
// 		duplicates tag H_P208, gen (H_dup)
//
// 		*Id inputed
// 		replace H_P208="99999991" if nombre_completo=="cesar francisco zurita barrera"
// 		replace H_P208="99999992" if nombre_completo=="carlos luis brito velasquez"
// 		replace H_P208="99999995" if nombre_completo=="carlton monasterios herrera"
//
// 		**Missing
// 		replace H_P208="999_1" if H_P208=="999" & nombre_completo=="carlos dario lugo rojas"
// 		replace H_P208="999_2" if H_P208=="999" & nombre_completo=="jabes ramon repillosa machado"
//
// 		*Keeping important variables only
// 		keep H_P208 /*M_P702 M_P703 M_P704*/ M_P705 M_P707 M_P708 M_P709 M_P710
// 		 /*
// 		 M_P705 -> ipv_v3_1 
// 		 M_P707 -> ipv_v5_1 
// 		 M_P708 -> ipv_v6_1
// 		 M_P709 -> ipv_v7_1 
// 		 M_P710 -> ipv_v8_1
// 		 */
//		 
// 		 //705 is hummillarla
// 		recode M_P705 (1 = 0 "Nunca") (2 = 1 "1 vez") (3 4 = 2 "2 veces"), gen (ipv3_inei)
// 		// 707 es sacudir, empujar o tirar algo 
// 		recode M_P707 (1 = 0 "Nunca") (2 = 1 "1 vez") (3 4 = 2 "2 veces"), gen (ipv5_inei)
// 		//708 bofeteo, retorcio el brazo 
// 		recode M_P708 (1 = 0 "Nunca") (2 = 1 "1 vez") (3 4 = 2 "2 veces"), gen (ipv6_inei)
// 		// 709  golpeo con puño 
// 		recode M_P709 (1 = 0 "Nunca") (2 = 1 "1 vez") (3 4 = 2 "2 veces"), gen (ipv7_inei)
// 		// 710 fuerza fisica para relaciones sexuales. 
// 		recode M_P710 (1 = 0 "Nunca") (2 = 1 "1 vez") (3 4 = 2 "2 veces"), gen (ipv8_inei)
//		
// // 		la var ipv1_reg "Args"
// // 		la var ipv2_reg "Contrl.B."
// // 		la var ipv3_reg "Psychological"
// // 		la var ipv4_reg "Physical"
// // 		la var ipv5_reg "Any"		
//
// 		drop M_P705 M_P707 M_P708 M_P709 M_P710 
//		
// 		gen hpi_inei_sample = 1
// 		tempfile a1
// 		save `a1'
// 	restore
//
// */
// merge m:1 H_P208 using `a1' , gen(hpi_data_merge) keep(1 3)







**************************************************************************
****************** VARIABLES
**************************************************************************


// top coding ipv to be only two or more
// some respondents manage to have more than 2 because of older iterations of the questions
foreach var of varlist ipv?_reg ipv?_inei {
	
	replace `var' =  2 if `var' >=2 & `var' != .
}


**Combining only into 5 categories


//ipv3_reg es humillarla
//ipv4_reg es insultar
//ipv5_reg es empujo, sacudio algo 
//ipv6_reg es abofeteo retorcio
//ipv7_reg es puño
//ipv8_reg es fuerza fixica para sexo

egen ipv34_reg=rowtotal(ipv3_reg ipv4_reg)
egen ipv5678_reg=rowtotal(ipv5_reg ipv6_reg ipv7_reg ipv8_reg)
egen ipv_any=rowtotal(ipv1_reg ipv2_reg ipv3_reg ipv4_reg ipv5_reg ipv6_reg ipv7_reg ipv8_reg)



// rename ipv34_reg ipv3_reg // this is psychological 
// rename ipv5678_reg ipv4_reg  //this is phsyical 
// rename ipv_any ipv5_reg // this is any


//inei doesn't have quesiton 4 so leaving it there 
egen ipv5678_inei=rowtotal(ipv5_inei ipv6_inei ipv7_inei ipv8_inei)


gen post_aprilmay= (year==2020)
gen post_julaug= (year==2021)






gen wfh_bartik= median_wfh*ln_bartik_national

gen log_income_woman=ln(income_woman_mid) 
gen log_income_man=ln(income_man_mid) 
egen income_hh=rowtotal(income_woman_mid income_man_mid)
gen log_income_hh=ln(income_hh)
gen ihs_income_hh=asinh(income_hh)

sort id_cases year

gen income_dummy=(income_hh>0) & income_hh!=.


egen days_out=rowtotal(shopping socialize)



bys id_cases: egen noIncQ2 = max((income_hh == 0) & (year == 2020))
//add income changes q2 vs 2019 
preserve
	keep id_cases income_hh year
	reshape wide income_hh, i(id_cases) j(year)
	gen income_hh_q22019 = ( income_hh2020 - income_hh2019)/income_hh2019 *100
	tempfile incomeChange
	sa `incomeChange'
restore

merge m:1 id_cases  using `incomeChange', nogen assert(3)

// from $do_folder_bartik/03 Bartik_instrument.do
merge m:1 ubigeo using "$data_folder/bartick_emp_changes.dta", gen(bar_changes_merge) keep(1 3)



**Additional controls(Javier)
*i) Years of education (and partner’s)
gen schooling_woman=0 if educ_woman==1
replace schooling_woman=6 if educ_woman==3 & educ_woman_grade_prim==6
replace schooling_woman=5 if educ_woman==3 & educ_woman_grade_prim==5
replace schooling_woman=4 if educ_woman==3 & educ_woman_grade_prim==4
replace schooling_woman=3 if educ_woman==3 & educ_woman_grade_prim==3
replace schooling_woman=2 if educ_woman==3 & educ_woman_grade_prim==2
replace schooling_woman=1 if educ_woman==3 & educ_woman_grade_prim==1

replace schooling_woman=6+5 if educ_woman==4 & educ_woman_grade_sec==5
replace schooling_woman=6+4 if educ_woman==4 & educ_woman_grade_sec==4
replace schooling_woman=6+3 if educ_woman==4 & educ_woman_grade_sec==3
replace schooling_woman=6+2 if educ_woman==4 & educ_woman_grade_sec==2
replace schooling_woman=6+1 if educ_woman==4 & educ_woman_grade_sec==1

replace schooling_woman=6+5+3 if educ_woman==5  //lower bounds
replace schooling_woman=6+5+5 if educ_woman==6  //lower bounds
replace schooling_woman=6+5+7 if educ_woman==7  //lower bounds


gen schooling_partner=0 if educ_partner==1
replace schooling_partner=6 if educ_partner==3 & educ_partner_grade_prim==6
replace schooling_partner=5 if educ_partner==3 & educ_partner_grade_prim==5
replace schooling_partner=4 if educ_partner==3 & educ_partner_grade_prim==4
replace schooling_partner=3 if educ_partner==3 & educ_partner_grade_prim==3
replace schooling_partner=2 if educ_partner==3 & educ_partner_grade_prim==2
replace schooling_partner=1 if educ_partner==3 & educ_partner_grade_prim==1

replace schooling_partner=6+5 if educ_partner==4 & educ_partner_grade_sec==5
replace schooling_partner=6+4 if educ_partner==4 & educ_partner_grade_sec==4
replace schooling_partner=6+3 if educ_partner==4 & educ_partner_grade_sec==3
replace schooling_partner=6+2 if educ_partner==4 & educ_partner_grade_sec==2
replace schooling_partner=6+1 if educ_partner==4 & educ_partner_grade_sec==1

replace schooling_partner=6+5+3 if educ_partner==5  //lower bounds
replace schooling_partner=6+5+5 if educ_partner==6  //lower bounds
replace schooling_partner=6+5+7 if educ_partner==7  //lower bounds

*ii) Mother tongue 
//mother_tongue


*iii) Age dummies (and partner’s)
// age
// age_partner


*iv) HH structure
// hh_members
// hh_adults
// children_noscholar
** Overcrowding
gen crowded=1 if (hh_members/hh_rooms_lockdown)>3.0
replace crowded=0 if missing(crowded)
replace crowded=. if (hh_members==.r | hh_rooms_lockdown==.r) 
replace crowded=. if (hh_members==.d | hh_rooms_lockdown==.d) 


*v) Pre pandemic type of work (if available? X time dummies) à wage employees receive a “gratificación” in Peru
// benefits_april_may
// benefits_april_may_value



// variable for getting covid 

gen med_covid_panel = medicalcare_covid  if year == 2020
replace med_covid_panel = 0 if year == 2019

gen hard_covid_panel = hard_covid  if year == 2020
replace hard_covid_panel = 0 if year == 2019

ren bartikq2q1 barChnQ2q1	
ren bartikq219 barChnQ219

// rescale mental health outcomes to be -1 0 1 

foreach var in anxiety sentimental loneliness rage raise_voice violence {
	
	replace `var' = `var' - 2 
	
	replace `var' = . if year != 2020
	
	label values `var' 
}

egen any_mental_health_worse = rowmax(anxiety sentimental loneliness rage raise_voice violence)

//cleaning bono to be a dummy with 1 0 but still keep missing 

gen bono = benefits_april_may == 1
replace bono = .  if  benefits_april_may != 1 & benefits_april_may != 0
**Labels
label var year "Period"


label var barChnQ2q1 "\$Z_{dt}\$ with Changes"
label var barChnQ219 "\$Z_{dt}\$ with Changes"

label var dummy_time  "Post"
label var post_aprilmay "April-May (2020)"
label var post_julaug "July-August (2020)"


la var ipv34_reg "Psychological"
la var ipv5678_reg "Physical/Sexual"
la var ipv_any "Any"

la var pct_chng_wgt "\$ g^{\text{sector}}_i\$"
la var pct_chng_wgt_v2 "\$ g^{\text{sector}}_i\$"





label var barChnQ219  "\$g^{\text{shift-share}}_i\$"



**Additional controls (Javier)
la var schooling_woman "Years of education woman"
la var schooling_partner "Years of education partner’s"
la var mother_tongue "Mother tongue"
la var age  "Age"
la var age_partner "Age(partner’s)"
la var hh_members "HH size during quarantine"
la var hh_adults "HH adults during quarantine"
la var children_noscholar "HH children under 4 years during quarantine"
la var crowded "Overcrowding"
la var benefits_april_may "Government benefits(dummy)"
la var benefits_april_may_value "Government benefits(soles)"

la var hard_covid_panel "Covid Hospi."
la var med_covid_panel "Covid HH"



gen phy_violence = ipv5678_reg
gen psy_violence = ipv34_reg
// gen cont_behavior = ipv2_reg
// gen arguments = ipv1_reg
gen any_violence = phy_violence + psy_violence


// la var arguments "Args"
// la var cont_behavior "Contrl.B."
la var psy_violence "Psychological"
la var phy_violence "Physical/Sexual"
la var any_violence "Any"


la var cases "Covid Cases"
la var deaths "Covid Death"

foreach var of varlist ln_* any_* {
	local proper = strproper("`var'")
	local proper = subinstr("`proper'", "_", " ",.)
	
	la var `var' "`proper'"
}





gen psPhy_violence = phy_violence + psy_violence
label var psPhy_violence "Psych. + Physi"

label var medicalcare_covid "COVID"
label val medicalcare_covid




ren income_hh_q22019 incq219




//label mental health vars 
foreach var in anxiety sentimental loneliness rage raise_voice violence {
	local proper = strproper("`var'")
	local proper = subinstr("`proper'", "_", " ",.)
	
	la var `var' "`proper'"
}

la var any_mental_health_worse "\specialcell{Any\\Mental Health}"
ren any_mental_health_worse any_mental
// Create dummies for any violence
foreach out in /*arguments cont_behavior*/ psy_violence phy_violence any_violence  {
	
	gen dum`out' = `out' > 0 if `out' != . 
	
}


//getting baseline income 

bys caseid: egen base_inc = max(income_hh * ( year == 2019) )

// la var dumarguments "Args"
// la var dumcont_behavior "Contrl.B."
la var dumpsy_violence "Psychological"
la var dumphy_violence "Physical/Sexual"
la var dumany_violence "Any"


// ren dumarguments dArgs
// ren dumcont_behavior dCont
ren dumpsy_violence dPsy
ren dumphy_violence dPhy
ren dumany_violence dAny

la var sentimental "Moodiness"


la var june_death_rate "June Death Rate"

//adding multiline labels for the income variables. This relies on a hack, which 
// uses the specialcell command which is inserted in every table made. 
// the hack is from https://tex.stackexchange.com/questions/2441/how-to-add-a-forced-line-break-inside-a-table-cell/19678#19678 

la var income_hh "\specialcell{Income\\Levels}"
la var log_income_hh "\specialcell{Income\\Log}"
la var income_dummy "\specialcell{Income\\$>0$}"
la var ihs_income_hh "\specialcell{Income\\IHS}"
la var days_out "\specialcell{Wife's Days\\Out}"


la var income_woman_mid   	"\specialcell{Wife's\\Income}"
la var income_man_mid   	"\specialcell{Husband's\\Income}"
la var income_hh "\specialcell{Household's\\Income}"

gen prop_inc_man = income_man_mid / income_hh * 100
gen prop_inc_wom = income_woman_mid / income_hh * 100

la var prop_inc_man "\specialcell{Male Inc. \\ Percentage}"

bys id_cases: egen basepsyPhyVio = max( (psPhy_violence * (year == 2019)) > 0  )

la var basepsyPhyVio "Psy. or Phys. IPV (2019)"

la var savings "\specialcell{Used Savings \\ Apr. May (2020)}"


gen man_earner = (income_man_mid>income_woman_mid) if   year==2019
la var man_earner "Male Top Earner (2019)"





sa "$data_folder/NBER_HPI_PANEL_with_bartick.dta", replace


// from /HRI - MIMP/01_ProjectDevelopment/03 Covid Activities/04. Women Survey/03_Survey/Data Management/00 HFC/02_dofiles/02_Descriptive statistics/03_descriptive_stats_updated.do
merge m:1 id_cases using "$data_folder/nber_sample_with_weights.dta", gen(weight_merge) keep(1 3)

sa "$data_folder/NBER_HPI_PANEL_with_bartick_and_weights.dta", replace



















